{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## 2.17 \u5728\u5b57\u7b26\u4e32\u4e2d\u5904\u7406html\u548cxml\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u95ee\u9898\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4f60\u60f3\u5c06HTML\u6216\u8005XML\u5b9e\u4f53\u5982 &entity; \u6216 &#code; \u66ff\u6362\u4e3a\u5bf9\u5e94\u7684\u6587\u672c\u3002\n\u518d\u8005\uff0c\u4f60\u9700\u8981\u8f6c\u6362\u6587\u672c\u4e2d\u7279\u5b9a\u7684\u5b57\u7b26(\u6bd4\u5982<, >, \u6216 &)\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u89e3\u51b3\u65b9\u6848\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5982\u679c\u4f60\u60f3\u66ff\u6362\u6587\u672c\u5b57\u7b26\u4e32\u4e2d\u7684 \u2018<\u2019 \u6216\u8005 \u2018>\u2019 \uff0c\u4f7f\u7528 html.escape() \u51fd\u6570\u53ef\u4ee5\u5f88\u5bb9\u6613\u7684\u5b8c\u6210\u3002\u6bd4\u5982\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "s = 'Elements are written as \"<tag>text</tag>\".'\nimport html\nprint(s)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print(html.escape(s))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Disable escaping of quotes\nprint(html.escape(s, quote=False))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5982\u679c\u4f60\u6b63\u5728\u5904\u7406\u7684\u662fASCII\u6587\u672c\uff0c\u5e76\u4e14\u60f3\u5c06\u975eASCII\u6587\u672c\u5bf9\u5e94\u7684\u7f16\u7801\u5b9e\u4f53\u5d4c\u5165\u8fdb\u53bb\uff0c\n\u53ef\u4ee5\u7ed9\u67d0\u4e9bI/O\u51fd\u6570\u4f20\u9012\u53c2\u6570 errors='xmlcharrefreplace' \u6765\u8fbe\u5230\u8fd9\u4e2a\u76ee\u3002\u6bd4\u5982\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "s = 'Spicy Jalape\u00f1o'\ns.encode('ascii', errors='xmlcharrefreplace')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u4e3a\u4e86\u66ff\u6362\u6587\u672c\u4e2d\u7684\u7f16\u7801\u5b9e\u4f53\uff0c\u4f60\u9700\u8981\u4f7f\u7528\u53e6\u5916\u4e00\u79cd\u65b9\u6cd5\u3002\n\u5982\u679c\u4f60\u6b63\u5728\u5904\u7406HTML\u6216\u8005XML\u6587\u672c\uff0c\u8bd5\u7740\u5148\u4f7f\u7528\u4e00\u4e2a\u5408\u9002\u7684HTML\u6216\u8005XML\u89e3\u6790\u5668\u3002\n\u901a\u5e38\u60c5\u51b5\u4e0b\uff0c\u8fd9\u4e9b\u5de5\u5177\u4f1a\u81ea\u52a8\u66ff\u6362\u8fd9\u4e9b\u7f16\u7801\u503c\uff0c\u4f60\u65e0\u9700\u62c5\u5fc3\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u6709\u65f6\u5019\uff0c\u5982\u679c\u4f60\u63a5\u6536\u5230\u4e86\u4e00\u4e9b\u542b\u6709\u7f16\u7801\u503c\u7684\u539f\u59cb\u6587\u672c\uff0c\u9700\u8981\u624b\u52a8\u53bb\u505a\u66ff\u6362\uff0c\n\u901a\u5e38\u4f60\u53ea\u9700\u8981\u4f7f\u7528HTML\u6216\u8005XML\u89e3\u6790\u5668\u7684\u4e00\u4e9b\u76f8\u5173\u5de5\u5177\u51fd\u6570/\u65b9\u6cd5\u5373\u53ef\u3002\u6bd4\u5982\uff1a"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "s = 'Spicy &quot;Jalape&#241;o&quot.'\nfrom html.parser import HTMLParser\np = HTMLParser()\np.unescape(s)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "t = 'The prompt is &gt;&gt;&gt;'\nfrom xml.sax.saxutils import unescape\nunescape(t)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### \u8ba8\u8bba\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5728\u751f\u6210HTML\u6216\u8005XML\u6587\u672c\u7684\u65f6\u5019\uff0c\u5982\u679c\u6b63\u786e\u7684\u8f6c\u6362\u7279\u6b8a\u6807\u8bb0\u5b57\u7b26\u662f\u4e00\u4e2a\u5f88\u5bb9\u6613\u88ab\u5ffd\u89c6\u7684\u7ec6\u8282\u3002\n\u7279\u522b\u662f\u5f53\u4f60\u4f7f\u7528 print() \u51fd\u6570\u6216\u8005\u5176\u4ed6\u5b57\u7b26\u4e32\u683c\u5f0f\u5316\u6765\u4ea7\u751f\u8f93\u51fa\u7684\u65f6\u5019\u3002\n\u4f7f\u7528\u50cf html.escape() \u7684\u5de5\u5177\u51fd\u6570\u53ef\u4ee5\u5f88\u5bb9\u6613\u7684\u89e3\u51b3\u8fd9\u7c7b\u95ee\u9898\u3002"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\u5982\u679c\u4f60\u60f3\u4ee5\u5176\u4ed6\u65b9\u5f0f\u5904\u7406\u6587\u672c\uff0c\u8fd8\u6709\u4e00\u4e9b\u5176\u4ed6\u7684\u5de5\u5177\u51fd\u6570\u6bd4\u5982 xml.sax.saxutils.unescapge() \u53ef\u4ee5\u5e2e\u52a9\u4f60\u3002\n\u7136\u800c\uff0c\u4f60\u5e94\u8be5\u5148\u8c03\u7814\u6e05\u695a\u600e\u6837\u4f7f\u7528\u4e00\u4e2a\u5408\u9002\u7684\u89e3\u6790\u5668\u3002\n\u6bd4\u5982\uff0c\u5982\u679c\u4f60\u5728\u5904\u7406HTML\u6216XML\u6587\u672c\uff0c\n\u4f7f\u7528\u67d0\u4e2a\u89e3\u6790\u6a21\u5757\u6bd4\u5982 html.parse \u6216 xml.etree.ElementTree \u5df2\u7ecf\u5e2e\u4f60\u81ea\u52a8\u5904\u7406\u4e86\u76f8\u5173\u7684\u66ff\u6362\u7ec6\u8282\u3002"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.1"
    },
    "toc": {
      "base_numbering": 1,
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": true,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {},
      "toc_section_display": true,
      "toc_window_display": true
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}